The code of auto-encoder for general image reconstruction is from https://github.smu.edu/48066464/CS8321Lab2_Decoder, whose author is Yihao Wang. Specifically, he uses VGG-19 model as the encoder and trains five VGG-19 decoder based on five different block from VGG-19. All model infomation is in the github https://github.smu.edu/48066464/CS8321Lab2_Decoder and SMU box https://smu.box.com/s/z8wcufal7kgxgh7n4dbkp1vseasy0xjd
For question 2 and 3, we choose three different decoders based on VGG-19 block2, VGG-19 block4 and VGG-19 block5 respectively to see how well the performancn for decoder is. It turns out the decoder has the better performance if they are based on the earlier blocks. As we can see, block2 almost perfectly restored the original image, but the block five just can decoder the edge of an objective from the original picture. There are many small squares on the first two reconstruction picture from Decoder5 and Decoder4, which means the decoder cannot understand each pixel feature very well.
from pathlib import PurePath
import tensorflow as tf
from skimage.transform import resize
from keras.preprocessing import image
import matplotlib.pyplot as plt
img = image.load_img("image/elephant.jpeg")
img = image.img_to_array(img)/255
img = tf.convert_to_tensor(resize(img,(384,384,3)))
ModelBlock5 = tf.keras.models.load_model(str(PurePath('Model','Block5_Model')), compile = False)
#Make it 4D Tensor
Image_sample_input = tf.expand_dims(img, 0)
Out_image_5 = ModelBlock5(Image_sample_input)
ModelBlock4 = tf.keras.models.load_model(str(PurePath('Model','Block4_Model')), compile = False)
Image_sample_input = tf.expand_dims(img, 0)
Out_image_4 = ModelBlock4(Image_sample_input)
ModelBlock2 = tf.keras.models.load_model(str(PurePath('Model','Block2_Model')), compile = False)
#Make it 4D Tensor
Image_sample_input = tf.expand_dims(img, 0)
Out_image_2 = ModelBlock2(Image_sample_input)
plt.figure(figsize=(15,15))
plt.subplot(1,4,1)
plt.title('Original')
plt.imshow(img)
plt.subplot(1,4,2)
plt.title('Decoder5')
plt.imshow(Out_image_5.numpy()[0])
plt.subplot(1,4,3)
plt.title('Decoder4')
plt.imshow(Out_image_4.numpy()[0])
plt.subplot(1,4,4)
plt.title('Decoder2')
plt.imshow(Out_image_2.numpy()[0])
plt.show()
img = image.load_img("image/starry.jpeg")
img = image.img_to_array(img)/255
img = tf.convert_to_tensor(resize(img,(384,384,3)))
ModelBlock5 = tf.keras.models.load_model(str(PurePath('Model','Block5_Model')), compile = False)
#Make it 4D Tensor
Image_sample_input = tf.expand_dims(img, 0)
Out_image_5 = ModelBlock5(Image_sample_input)
ModelBlock4 = tf.keras.models.load_model(str(PurePath('Model','Block4_Model')), compile = False)
Image_sample_input = tf.expand_dims(img, 0)
Out_image_4 = ModelBlock4(Image_sample_input)
ModelBlock2 = tf.keras.models.load_model(str(PurePath('Model','Block2_Model')), compile = False)
#Make it 4D Tensor
Image_sample_input = tf.expand_dims(img, 0)
Out_image_2 = ModelBlock2(Image_sample_input)
plt.figure(figsize=(15,15))
plt.subplot(1,4,1)
plt.title('Original')
plt.imshow(img)
plt.subplot(1,4,2)
plt.title('Decoder5')
plt.imshow(Out_image_5.numpy()[0])
plt.subplot(1,4,3)
plt.title('Decoder4')
plt.imshow(Out_image_4.numpy()[0])
plt.subplot(1,4,4)
plt.title('Decoder2')
plt.imshow(Out_image_2.numpy()[0])
plt.show()
We shows four pictures generated by WCT process and the new stylized picture looks pretty well.
#### !/usr/bin/env python
# -*- coding: utf-8 -*-
import tensorflow as tf
from pathlib import PurePath
import sys
from keras.preprocessing import image
from keras.models import load_model
from keras import backend as K
import numpy as np
class VGG19AE(tf.keras.Model):
"""
The decoders coder is from
https://github.smu.edu/48066464/CS8321Lab2_Decoder
we go through all layers and blocks in this part
"""
def __init__(self, files_path):
super(VGG19AE, self).__init__()
#Load Model
ModelBlock5 = tf.keras.models.load_model(str(PurePath(files_path, 'Block5_Model')), compile = False)
#Get Each SubModel
self.E5 = ModelBlock5.layers[0]
self.D5 = ModelBlock5.layers[1]
self.O5 = ModelBlock5.layers[2]
ModelBlock4 = tf.keras.models.load_model(str(PurePath(files_path, 'Block4_Model')), compile = False)
self.E4 = ModelBlock4.layers[0]
self.D4 = ModelBlock4.layers[1]
self.O4 = ModelBlock4.layers[2]
ModelBlock3 = tf.keras.models.load_model(str(PurePath(files_path, 'Block3_Model')), compile = False)
self.E3 = ModelBlock3.layers[0]
self.D3 = ModelBlock3.layers[1]
self.O3 = ModelBlock3.layers[2]
ModelBlock2 = tf.keras.models.load_model(str(PurePath(files_path, 'Block2_Model')), compile = False)
self.E2 = ModelBlock2.layers[0]
self.D2 = ModelBlock2.layers[1]
self.O2 = ModelBlock2.layers[2]
ModelBlock1 = tf.keras.models.load_model(str(PurePath(files_path, 'Block1_Model')), compile = False)
self.E1 = ModelBlock1.layers[0]
self.O1 = ModelBlock1.layers[1]
def call(self, Image, training = False):
# Input should be 4D Tensor
xo, I2 = Image
x = self.E5(xo)
style = self.E5(I2)
#Add WCT Here
#x = wct(x,style)
x = self.wct(x,style)
x = self.D5(x)
x = self.O5(x)
x = self.E4(x)
style = self.E4(I2)
#Add WCT Here
#x = wct(x,style)
x = self.wct(x,style)
x = self.D4(x)
x = self.O4(x)
x = self.E3(x)
style = self.E3(I2)
#Add WCT Here
#x = wct(x,style)
x = self.wct(x,style)
x = self.D3(x)
x = self.O3(x)
x = self.E2(xo)
style = self.E2(I2)
#Add WCT Here
#x = wct(x,style)
x = self.wct(x,style)
x = self.D2(x)
x = self.O2(x)
# Block 1 Donot have decoder because it don't contain Pooling and there are only one Conv layer.
#x = self.E1(x)
#x = self.O1(x)
return tf.clip_by_value(tf.squeeze(x), 0, 1)
def wct(self, content, style, alpha=0.7, eps=1e-5):
'''
https://github.com/eridgd/WCT-TF/blob/master/ops.py
Perform Whiten-Color Transform on feature maps using numpy
See p.4 of the Universal Style Transfer paper for equations:
https://arxiv.org/pdf/1705.08086.pdf
'''
# 1xHxWxC -> CxHxW
content_t = np.transpose(np.squeeze(content), (2, 0, 1))
style_t = np.transpose(np.squeeze(style), (2, 0, 1))
# CxHxW -> CxH*W
#-1 here means calculate the #of rows automatically
content_flat = content_t.reshape(-1, content_t.shape[1]*content_t.shape[2])
style_flat = style_t.reshape(-1, style_t.shape[1]*style_t.shape[2])
# Whitening transform
#normalized
mc = content_flat.mean(axis=1, keepdims=True)
fc = content_flat - mc
fcfc = np.dot(fc, fc.T) / (content_t.shape[1]*content_t.shape[2] - 1)
#SVD
Ec, wc, _ = np.linalg.svd(fcfc)
k_c = (wc > 1e-5).sum()
Dc = np.diag((wc[:k_c]+eps)**-0.5)
fc_hat = Ec[:,:k_c].dot(Dc).dot(Ec[:,:k_c].T).dot(fc)
# Coloring transform
ms = style_flat.mean(axis=1, keepdims=True)
fs = style_flat - ms
fsfs = np.dot(fs, fs.T) / (style_t.shape[1]*style_t.shape[2] - 1)
Es, ws, _ = np.linalg.svd(fsfs)
k_s = (ws > 1e-5).sum()
Ds = np.sqrt(np.diag(ws[:k_s]+eps))
fcs_hat = Es[:,:k_s].dot(Ds).dot(Es[:,:k_s].T).dot(fc_hat)
fcs_hat = fcs_hat + ms
# Blend transform features with original features
blended = alpha*fcs_hat + (1 - alpha)*(fc)
# CxH*W -> CxHxW
blended = blended.reshape(content_t.shape)
# CxHxW -> 1xHxWxC
blended = np.expand_dims(np.transpose(blended, (1,2,0)), 0)
return np.float32(blended)
def loadIMAGE(url_content, url_style, Model):
"""
1.Implement the whitening and coloring transform
2.Resize the picture
3.Plot the stylized picture
"""
import matplotlib.pyplot as plt
%matplotlib inline
img_c = image.load_img(url_content)
img_c = image.img_to_array(img_c)/255
img_c = resize(img_c, (384,384))
img_c_shape = img_c.shape
img_s = image.load_img(url_style)
img_s = image.img_to_array(img_s)/255
img_s = resize(img_s, (384,384))
img_s_shape = img_s.shape
Image_sample_input = (tf.expand_dims(img_c, 0), tf.expand_dims(img_s, 0))
Out_image = model(Image_sample_input)
plt.figure(figsize = (384,384))
plt.subplot(1,3,1)
plt.title("content")
plt.imshow(img_c)
plt.subplot(1,3,2)
plt.title("style")
plt.imshow(img_s)
plt.subplot(1,3,3)
plt.title("result")
plt.imshow(Out_image)
plt.show()
return img_c, Out_image
model = VGG19AE('Model')
content_img_1, out_img_1 = loadIMAGE("image/sky.jpg","image/dongman1.jpg", model)
content_img_2, out_img_2 = loadIMAGE("image/cityofSky.jpg","image/superJumbo.jpg", model)
content_img_3, out_img_3 = loadIMAGE("image/shashenwan.png","image/style1.png", model)
content_img_4, out_img_4 = loadIMAGE("image/timesquareday.jpg","image/timesquarenight.jpg", model)
We shows 3 pictures generated by WCT+smoothing. We can notices that the real photo (time square) has better performance than other unrealistic photo. However, in general, all of them do not perform very well. The desired photo style is closer to the initial content image. ( The comments about code is in the code box)
"""
This code is from
https://github.com/NVIDIA/FastPhotoStyle/blob/master/photo_smooth.py
The original paper is:
https://arxiv.org/pdf/1802.06474.pdf
"""
from __future__ import division
#import torch.nn as nn
#from keras.models import Model
import scipy.misc
import cv2
import numpy as np
import scipy.sparse
import scipy.sparse.linalg
from numpy.lib.stride_tricks import as_strided
from PIL import Image
class Propagator():
def __init__(self, beta=0.9999):
super(Propagator, self).__init__()
self.beta = beta
def process(self, initImg, contentImg):
"""
The colsed form solution for smoothing result
Calculate the desired smoothed output R
"""
#load content image and the image generated by WCT process
if type(contentImg) == str:
content = image.load_img(contentImg, color_mode='rgb')
content = image.img_to_array(content)
else:
content = contentImg.copy()
# content = scipy.misc.imread(contentImg, mode='RGB')
if type(initImg) == str:
B = image.load_img(initImg, color_mode='rgb')
B = image.img_to_array(B)
else:
B = np.asarray(initImg)
# B = self.
# B = scipy.misc.imread(initImg, mode='RGB').astype(np.float64)/255
#get the initial content array
h1,w1,k = B.shape
h = h1 - 4
w = w1 - 4
B = B[int((h1-h)/2):int((h1-h)/2+h),int((w1-w)/2):int((w1-w)/2+w),:]
#replace scipy.misc.imresize
#numpy.array(Image.fromarray(arr).resize()) or skimage.transform.resize()
#remember the cv2.resize should be cv2.resize(content, (w,h))
content = cv2.resize(content, (w,h))
#pad the images in case the loss of the image infomation in the later operation
B = self.__replication_padding(B,2)
content = self.__replication_padding(content,2)
content = content.astype(np.float64)
B = np.reshape(B,(h1*w1,k))
W = self.__compute_laplacian(content) #calculate the W get the sparse matting laplacian
W = W.tocsc() #Convert this matrix to compressed sparse column format
#define the affinity
dd = W.sum(0)
dd = np.sqrt(np.power(dd,-1))
dd = dd.A.squeeze()
D = scipy.sparse.csc_matrix((dd, (np.arange(0,w1*h1), np.arange(0,w1*h1)))) # 0.026
S = D.dot(W).dot(D) #the normalized Laplacian matrix
A = scipy.sparse.identity(w1*h1) - self.beta*S #(I-alphaS)
A = A.tocsc() #Convert this matrix to compressed sparse column format
#Return a function for solving a sparse linear system with A pre-factorized.
solver = scipy.sparse.linalg.factorized(A)
V = np.zeros((h1*w1,k))
V[:,0] = solver(B[:,0])
V[:,1] = solver(B[:,1])
V[:,2] = solver(B[:,2])
#get the closed form solution of R_hat
V = V*(1-self.beta)
V = V.reshape(h1,w1,k)
V = V[2:2+h,2:2+w,:]
# print(V)
print(type(V))
img = np.clip(V , 0, 1.0)
return img
# Returns sparse matting laplacian
# The implementation of the function is heavily borrowed from
# https://github.com/MarcoForte/closed-form-matting/blob/master/closed_form_matting.py
def __compute_laplacian(self, img, eps=10**(-7), win_rad=1):
"""
this is the matting affinity
the affinity between two pixels is based on means
and variances of pixels in a local window
"""
win_size = (win_rad*2+1)**2
h, w, d = img.shape
c_h, c_w = h - 2*win_rad, w - 2*win_rad
win_diam = win_rad*2+1
indsM = np.arange(h*w).reshape((h, w))
ravelImg = img.reshape(h*w, d)
# rolling windows for later convolutional calculation
win_inds = self.__rolling_block(indsM, block=(win_diam, win_diam))
win_inds = win_inds.reshape(c_h, c_w, win_size)
winI = ravelImg[win_inds]
# calculate the mean of the local window
win_mu = np.mean(winI, axis=2, keepdims=True)
#calculate the sigma through the local window
win_var = np.einsum('...ji,...jk ->...ik', winI, winI)/win_size - np.einsum('...ji,...jk ->...ik', win_mu, win_mu)
inv = np.linalg.inv(win_var + (eps/win_size)*np.eye(3))
X = np.einsum('...ij,...jk->...ik', winI - win_mu, inv)
vals = (1/win_size)*(1 + np.einsum('...ij,...kj->...ik', X, winI - win_mu))
nz_indsCol = np.tile(win_inds, win_size).ravel()
nz_indsRow = np.repeat(win_inds, win_size).ravel()
nz_indsVal = vals.ravel()
L = scipy.sparse.coo_matrix((nz_indsVal, (nz_indsRow, nz_indsCol)), shape=(h*w, h*w))
return L
def __replication_padding(self, arr,pad):
"""
pad the images in case the loss of
the image infomation in the later operation
"""
h,w,c = arr.shape
ans = np.zeros((h+pad*2,w+pad*2,c))
for i in range(c):
ans[:,:,i] = np.pad(arr[:,:,i],pad_width=(pad,pad),mode='edge')
return ans
def __rolling_block(self, A, block=(3, 3)):
"""
rolling the local windows so that we can do the
convolutional calculation (calculate the mu and sigma^2)
"""
shape = (A.shape[0] - block[0] + 1, A.shape[1] - block[1] + 1) + block
strides = (A.strides[0], A.strides[1]) + A.strides
return as_strided(A, shape=shape, strides=strides) #Create a view into the array with the given shape and strides
new_image = Propagator(beta=0.999999).process(content_img_4, out_img_4.numpy())
plt.figure(figsize = (20,10))
plt.subplot(1,3,1)
plt.imshow(new_image)
plt.subplot(1,3,2)
plt.imshow(out_img_4)
plt.subplot(1, 3, 3)
plt.imshow(content_img_4)
plt.show()
new_image = Propagator(beta=0.999999).process(content_img_2, out_img_2.numpy())
plt.figure(figsize = (20,10))
plt.subplot(1,3,1)
plt.imshow(new_image)
plt.subplot(1,3,2)
plt.imshow(out_img_2)
plt.subplot(1, 3, 3)
plt.imshow(content_img_2)
plt.show()
new_image = Propagator(beta=0.999999).process(content_img_3, out_img_3.numpy())
plt.figure(figsize = (20,10))
plt.subplot(1,3,1)
plt.imshow(new_image)
plt.subplot(1,3,2)
plt.imshow(out_img_3)
plt.subplot(1, 3, 3)
plt.imshow(content_img_3)
plt.show()